# -*- coding: utf-8 -*-
import urllib2
import urllib
from bs4 import BeautifulSoup

import time
import re

#
import openpyxl

#检查系统版本库
import platform

#
import Queue
import threading
import gevent

#检查系统版本
#System_version = platform.platform()
System_version = platform.system()
if System_version.find('windows') != -1:
	from gevent import monkey
	monkey.patch_all()
	import sys
	reload(sys)
	sys.setdefaultencoding('UTF-8')

xl_data = []

def pa(url):
	a = urllib.urlopen(url)
	#print a.read()
	soup = BeautifulSoup(a.read())
	#print soup.prettify()

	#print 'quantity:%s'%str(len(soup.find_all('li'))-14)

	for  i in xrange(14,len(soup.find_all('li'))):

		data = []

		string = soup.find_all('li')[i]

		title = string.find_all('p')[0]

		title = string.find_all('a')[0].prettify().encode('utf-8')
		title = re.findall(r't=".*" c',title)[0]
		title = title[2:-2]
		#print title
		#time.sleep(300)	
		'''
		print title.attrs['title']
		'''
		data.append(title)
		#print '项目名称:',data[0]
		
		for k in string.find_all('div'):
			#print k.attrs
			if k.attrs['class'][0] == 'item-num':
				content = k.prettify()
				content = content.encode('utf-8')
				#print content
				content = content.replace('\n','').replace(' ','').replace('\t','')
				#print content
				for  k in re.findall(r'\d\d*', content):
				 	data.append(k)
		#print data

		print '项目名称:',data[0] 
		print '剩余天数：%s'%data[1]
		print '关注：%s'%data[2]
		print '支持：%s'%data[4]
		print '达成%s'%data[5]+'%'
		print '已筹集：%s'%data[6]
		print '--'*20

		xl_data.append(data)

def xl():

	wk = openpyxl.load_workbook(r'C:\ttt3.xlsx')
	sheet_name = wk.get_sheet_names()
	sheet1 = wk.get_sheet_by_name(sheet_name[0])
	sheet1['A1'].value = '标题'
	sheet1['B1'].value = '剩余天数'
	sheet1['C1'].value = '关注'
	sheet1['D1'].value = '支持'
	sheet1['E1'].value = '达成(%)'
	sheet1['F1'].value = '已筹集'


	for x in xrange(len(xl_data)):
		y = x + 2	
		sheet1['A%s'%(str(y))].value = xl_data[x][0]
		sheet1['B%s'%(str(y))].value = xl_data[x][1]
		sheet1['C%s'%(str(y))].value = xl_data[x][2]
		sheet1['D%s'%(str(y))].value = xl_data[x][4]
		sheet1['E%s'%(str(y))].value = xl_data[x][5]
		sheet1['F%s'%(str(y))].value = xl_data[x][6]

	print 'save...'
	wk.save(r'C:\ttt3.xlsx')
	print 'end'	
def pa2(url):

	#设置HTTP代理
	'''
	proxy_support = urllib2.ProxyHandler({'http':'proxy.tencent.com:8080'})
	opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler)
	urllib2.install_opener(opener)
	'''
	a = urllib2.urlopen(url)
	#time.sleep(0)
	#print a.read()
	soup = BeautifulSoup(a.read())
	#print url

	for string in soup.find_all('li'):
		data = [ 'NA' for x in range(0,8)]
		#print len(data)
		#0'项目名称:'
		#1'状态'
		#2'剩余天数'
		#3'关注：'
		#4'支持：'
		#5'达成:'
		#6'已筹集：'
		#7'开始时间'
		txt = ''
		for k in string.find_all('div'):
			txt = k.prettify().encode('utf-8').replace('\n','').replace(' ','').replace('\t','')
			#print txt
			if txt.find('项目成功') != -1:
				#print '101'
				#print txt
				data[0] = str(re.findall(r'e=.*><a',txt)[0][2:-3])
				data[1] = '项目成功'
				#print data[0]
				d = re.findall(r'<em>关.*</strong>',txt)
				#print d[0]
				d = re.findall(r'\d\d*',d[0])
				#print d
				del d[1],d[2]
				#print d
				for i in xrange(4):
					data[i+3] = d[i]
				for x in data:
					print x
				mutex.acquire()	
				xl_data.append(data)
				mutex.release()
				break
			elif txt.find('即将开始') != -1:
				#print '202'
				#print txt
				#print re.findall(r'e=".*"><a',txt)[0][2:-3]
				data[0] = str(re.findall(r'e=.*><a',txt)[0][2:-3])
				data[1] = '即将开始'
				d = re.findall(r'\d\d*',txt)
				#print d
				data[3] = d[2]
				data[7] = re.findall(r'开始时间.*</b', txt)[0][12:-3]
				for x in data:
					print x
				mutex.acquire()		
				xl_data.append(data)
				mutex.release()
				break
			elif txt.find('众筹中') != -1:
				#print '303'
				data[0] = str(re.findall(r'e=.*><a',txt)[0][2:-3])
				data[1] = '众筹中'
				d = re.findall(r'<b>剩余.*</strong>',txt)
				#print d[0]
				d = re.findall(r'\d\d*',d[0])
				del d[2],d[3]
				#print d
				for i in range(5):
					data[i+2] = d[i]
				for x in data:
					print x
				mutex.acquire()	
				xl_data.append(data)
				mutex.release()
				break
			elif txt.find('众筹成功') != -1:
				#print '404'
				data[0] = str(re.findall(r'e=.*><a',txt)[0][2:-3])
				data[1] = '众筹成功'
				#print data[0]
				d = re.findall(r'<em>关.*</strong>',txt)
				#print d[0]
				d = re.findall(r'\d\d*',d[0])
				#print d
				del d[1],d[2]
				#print d
				for i in xrange(4):
					data[i+3] = d[i]
				for x in data:
					print x
				mutex.acquire()	
				xl_data.append(data)
				mutex.release()
				break
	#print '='*30


if __name__ == '__main__':
	
	q = Queue.Queue(100)
	for x in xrange(1,20):
		q.put(x)

	print q.qsize()

	#线程锁标记
	mutex = threading.Lock()
        start = time.clock()
	while q.qsize() != 0:
		if q.qsize() < 2:
			pa2('http://zc.suning.com/project/browseList.htm?c=&t=&s=&keyWords=&pageNumber='+'%s'%q.get())
		else:
			print 'size:',q.qsize()
			'''
			threads = []
			print q.qsize()
			t1 = threading.Thread(target=pa2,args=('http://zc.suning.com/project/browseList.htm?c=&t=&s=&keyWords=&pageNumber='+'%s'%q.get(),))
			threads.append(t1)
			t2 = threading.Thread(target=pa2,args=('http://zc.suning.com/project/browseList.htm?c=&t=&s=&keyWords=&pageNumber='+'%s'%q.get(),))
			threads.append(t2)
			for t in threads:
				t.setDaemon(True)
				t.start()
			t.join()
			
			'''
			gevent.joinall([gevent.spawn(pa2,'http://zc.suning.com/project/browseList.htm?c=&t=&s=&keyWords=&pageNumber='+'%s'%q.get()),
							gevent.spawn(pa2,'http://zc.suning.com/project/browseList.htm?c=&t=&s=&keyWords=&pageNumber='+'%s'%q.get()),],timeout=10)
			
	end = time.clock()
	print len(xl_data)
	print xl_data[len(xl_data)-1][0]
	print xl_data[0][0]
	print 'end'
	print "read: %f s" % (end - start)

	'''
	for x in xrange(1,5):
		print x
		pa(r'http://zc.suning.com/project/browseList.htm?c=&t=02&s=&keyWords=&pageNumber='+'%s'%x)
	'''
	#print xl_data
	#xl()
	'''
	for x in xrange(1,55):
		print x
		pa2('http://zc.suning.com/project/browseList.htm?c=&t=02&s=&keyWords=&pageNumber='+'%s'%x)
	print 'end'
	'''
	